/*
 ============================================================================
 Name        : mergeToSuffixTree.c
 Author      : M.Barsky
 Version     :
 Copyright   : Your copyright notice
 Description : 
 ============================================================================
 */

#include <stdio.h>
#include <stdlib.h>
#include "Partitions.h"
#include "SuffixTree.h"
#include <time.h>

#define debug 1
#define OUTPUT_BUFFER 5120   //5000*1024 apx 5 mb of suffix start positions

FILE *outputFile;
FILE *suffixArrayFile;
FILE *dividersFile;
extern int outputFileNumberCounter;
extern int numBitsInLong;


int numberOfChunks;
int *lengths;
int *fileNumbers;
int input_buffer_max;
int output_buffer_max;
int *totallengthsdoubled;


char smallBinaryFileNamePrefix[MAX_PATH_LENGTH];
char smallMergedFileNameprefix[MAX_PATH_LENGTH];
char smallSuffixArrayFileNamePrefix[MAX_PATH_LENGTH];

FILE *inputStringFile;
unsigned int **binaryString;
int memoryForInputBuffers;

int mergePartitions(char *outputNamePrefix, char *smallSuffixArrayFileNamePrefix, char *smallBinaryFileNamePrefix, char *lengthsFileName, 
					char * fileNumbersFileName,char *binarylengthsFileName)
{
	int i;
	char currentInputFileName[MAX_PATH_LENGTH];
	int maxChunkSize=0;
	int result;
	char dividersFileName [MAX_PATH_LENGTH];
	int binarylen;

	//1. determine partitions of the input
	numberOfChunks=getNumberOfChunks(fileNumbersFileName);

	lengths=(int*) calloc (numberOfChunks, sizeof(int)); 
	fileNumbers=(int*) calloc (numberOfChunks, sizeof(int));
	totallengthsdoubled=(int*) calloc (numberOfChunks, sizeof(int)); 

	readPartitionInfo(lengthsFileName,fileNumbersFileName,binarylengthsFileName,
					   lengths, fileNumbers,totallengthsdoubled,numberOfChunks);
	
	
	sprintf(smallMergedFileNameprefix,"%s_tree", outputNamePrefix);

	//2. calculate size of input and output buffers
	for(i=0;i<numberOfChunks;i++)
	{
		if(maxChunkSize<lengths[i])
			maxChunkSize=lengths[i];
	}

	if(debug)
		printf("max partition size =%d\n",maxChunkSize);


	input_buffer_max=memoryForInputBuffers/(numberOfChunks*sizeof(Suffix));

	if(debug)
		printf("input buffer size = %d, output buffer size = %d \n",input_buffer_max,output_buffer_max);
	
	
	
	//3 fill binary input - at most 1500 MB RAM for input string
	binaryString=(unsigned int **) malloc ( sizeof(unsigned int *)*numberOfChunks);
	for(i=0;i<numberOfChunks;i++)
	{
		sprintf(currentInputFileName,"%s_%i", smallBinaryFileNamePrefix,i);
		binaryString[i]=(unsigned int *) malloc ( sizeof(unsigned int )*(totallengthsdoubled[i]/numBitsInLong+1));
		if(!(inputStringFile=fopen(currentInputFileName,"rb")))
		{
			printf("Cannot open input string File %s for reading\n",currentInputFileName);
			exit(1);
		}

		fseek (inputStringFile, 0, SEEK_END);
    

		binarylen=ftell(inputStringFile)/sizeof(unsigned int);
		rewind(inputStringFile);
		result=fread(binaryString[i],sizeof(unsigned int),binarylen,inputStringFile);
		
		if(result!=binarylen)
		{
			printf("binary string %i reading error\n",i);
			exit(1);
		}
		fclose(inputStringFile);
	}


	//5. open dividerFile for writing boundaries	
	sprintf(dividersFileName,"%s_dividers", outputNamePrefix);
	if(!(dividersFile=fopen(dividersFileName,"wb")))
	{
		printf("Cannot open dividersFile %s for writing\n",dividersFileName);
		exit(1);
	}

	//6. fill buffers for the first time and build heap
	// from the first elements of partitions
	fillInitialBuffers();
	initializeMerge();

	//7. perform merge
	mergeSuffixArrays();	

	fclose(dividersFile);
	
	return 0;
}




int main(int argc, char *argv[])
{
	
	char outputNamePrefix[MAX_PATH_LENGTH];	
	char inputfilenameprefix [MAX_PATH_LENGTH];		
	char smallBinaryFileNamePrefix[MAX_PATH_LENGTH];
	char lengthsFileName[MAX_PATH_LENGTH];
	char fileNumbersFileName[MAX_PATH_LENGTH];
	
	char bitSequencesLengthsFileName[MAX_PATH_LENGTH];

	clock_t startclock, stopclock;
	
	//to calculate input_buffer_size: [(memory - memory for binary input)/sizeof(STNode)]/numberoffiles
	//f.e. for 4000 files and 2GB memory: 2000000000/4000/25=500000/25=20000

	if(argc<6)
	{
		printf("mergeToSuffixTree <tempdir> <outputdir> <treenameprefix> <memforinputbuffers> <maxsubtree>\n");
		return 1;
	}

	

	sprintf(inputfilenameprefix,"%s%s", argv[1], argv[3]);
	sprintf(outputNamePrefix,"%s%s", argv[2], argv[3]);
		
	sprintf(smallBinaryFileNamePrefix,"%s_smallbinary", inputfilenameprefix);
	sprintf(lengthsFileName,"%s_lengths", inputfilenameprefix);
	sprintf(fileNumbersFileName,"%s_filenumbers", inputfilenameprefix);
	sprintf(smallSuffixArrayFileNamePrefix,"%s_small_sarray", inputfilenameprefix);
	
	sprintf(bitSequencesLengthsFileName,"%s_binarylengths", inputfilenameprefix);

	memoryForInputBuffers=atoi(argv[4]); 
	output_buffer_max=atoi(argv[5]); 

	startclock = clock();

	if(mergePartitions(outputNamePrefix, smallSuffixArrayFileNamePrefix, smallBinaryFileNamePrefix, lengthsFileName, 
					fileNumbersFileName, bitSequencesLengthsFileName))
		return 1;

	stopclock = clock();
	
	printf("%f mseconds total\n", (double) (stopclock-startclock));	
	
	
	return 0;
}
